Part 1

# Load libraries

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(ggplot2)
library(dplyr)

Part 1 (a): Reading in Data

df <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\25_Portfolios_5x5.csv", header = TRUE, skip = 15)
df <- df[1:1146,]

names(df)[names(df) == "X"] <- "Date"
df <- as.data.frame(lapply(df, as.numeric))
df$Date <- parse_date_time(df$Date, "ym")

tail(df)
##            Date SMALL.LoBM ME1.BM2 ME1.BM3 ME1.BM4 SMALL.HiBM ME2.BM1 ME2.BM2
## 1141 2021-07-01   -10.1722 -7.6929 -3.6097 -4.7531    -1.5492 -7.4616 -3.2841
## 1142 2021-08-01     2.5319  5.5794  4.0683  1.8348     3.9693  1.9219  2.3791
## 1143 2021-09-01    -5.1961 -4.3855 -3.3635 -0.8425     0.1511 -5.3511 -4.1155
## 1144 2021-10-01     2.0093 -0.3933  1.0377  1.8838     4.1799  2.1078  2.5007
## 1145 2021-11-01    -8.7691 -6.8659 -4.1992 -3.8271    -3.4686 -7.9499 -3.8268
## 1146 2021-12-01    -6.6488  0.3345  1.9093  2.3951     3.3515 -0.2830  0.0193
##      ME2.BM3 ME2.BM4 ME2.BM5 ME3.BM1 ME3.BM2 ME3.BM3 ME3.BM4 ME3.BM5 ME4.BM1
## 1141 -1.7334 -2.1717 -6.0351 -2.5013 -1.1614 -1.0348 -1.1665 -4.5054 -0.7491
## 1142  1.2297  1.5206  2.4331  1.9087  1.4109  1.8187  1.8091  3.5476  3.2077
## 1143 -3.6209 -0.4379  3.3170 -3.8808 -4.0114 -2.8858 -4.3101  1.9898 -4.3556
## 1144  3.8364  4.6983  3.0187  4.7347  3.5965  3.9967  3.9258  4.2605  4.7589
## 1145  0.7390 -3.0060 -4.1636 -8.5724 -2.4178 -2.3881 -2.6836 -3.9083 -6.4840
## 1146  6.0397  6.3250  4.9877  0.1028  4.4464  4.3764  5.2353  4.4539  0.5204
##      ME4.BM2 ME4.BM3 ME4.BM4 ME4.BM5 BIG.LoBM ME5.BM2 ME5.BM3 ME5.BM4 BIG.HiBM
## 1141  1.8909 -1.2862  1.3608 -4.2888   3.3015  3.1933  0.4040 -1.5415  -3.4379
## 1142  1.5669  1.6435  3.2307  5.0117   3.3117  4.1280  0.5554  1.2868   4.3102
## 1143 -5.0464 -2.1698 -4.5869  1.4868  -5.2780 -6.2358 -4.3843 -0.7534   0.6721
## 1144  5.5583  5.4486  4.5498  4.4312   8.9941  5.6401  0.8190  5.5387   7.5280
## 1145 -1.9543 -2.9812 -1.3566 -4.6413   1.2334 -3.0009 -3.0145 -4.5284  -6.6413
## 1146  5.9283  5.3941  6.1848  3.9667   2.1199  5.2625  6.9025  5.8952   1.6612
names(df) <- gsub("\\.", "_", names(df))
names(df)
##  [1] "Date"       "SMALL_LoBM" "ME1_BM2"    "ME1_BM3"    "ME1_BM4"   
##  [6] "SMALL_HiBM" "ME2_BM1"    "ME2_BM2"    "ME2_BM3"    "ME2_BM4"   
## [11] "ME2_BM5"    "ME3_BM1"    "ME3_BM2"    "ME3_BM3"    "ME3_BM4"   
## [16] "ME3_BM5"    "ME4_BM1"    "ME4_BM2"    "ME4_BM3"    "ME4_BM4"   
## [21] "ME4_BM5"    "BIG_LoBM"   "ME5_BM2"    "ME5_BM3"    "ME5_BM4"   
## [26] "BIG_HiBM"
df_FF <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\F-F_Research_Data_Factors.csv", header = TRUE, skip = 3)
df_FF <- df_FF[1:1146,]

names(df_FF)[names(df_FF) == "X"] <- "Date"
names(df_FF)[names(df_FF) == "Mkt.RF"] <- "Mkt_RF"
df_FF <- as.data.frame(lapply(df_FF, as.numeric))
df_FF$Date <- parse_date_time(df_FF$Date, "ym")

tail(df_FF)
##            Date Mkt_RF   SMB   HML   RF
## 1141 2021-07-01   1.27 -3.96 -1.75 0.00
## 1142 2021-08-01   2.90 -0.48 -0.13 0.00
## 1143 2021-09-01  -4.37  0.80  5.09 0.00
## 1144 2021-10-01   6.65 -2.28 -0.44 0.00
## 1145 2021-11-01  -1.55 -1.35 -0.53 0.00
## 1146 2021-12-01   3.10 -1.58  3.22 0.01
df_combined <- left_join(df, df_FF, by = "Date")
tail(df_combined)
##            Date SMALL_LoBM ME1_BM2 ME1_BM3 ME1_BM4 SMALL_HiBM ME2_BM1 ME2_BM2
## 1141 2021-07-01   -10.1722 -7.6929 -3.6097 -4.7531    -1.5492 -7.4616 -3.2841
## 1142 2021-08-01     2.5319  5.5794  4.0683  1.8348     3.9693  1.9219  2.3791
## 1143 2021-09-01    -5.1961 -4.3855 -3.3635 -0.8425     0.1511 -5.3511 -4.1155
## 1144 2021-10-01     2.0093 -0.3933  1.0377  1.8838     4.1799  2.1078  2.5007
## 1145 2021-11-01    -8.7691 -6.8659 -4.1992 -3.8271    -3.4686 -7.9499 -3.8268
## 1146 2021-12-01    -6.6488  0.3345  1.9093  2.3951     3.3515 -0.2830  0.0193
##      ME2_BM3 ME2_BM4 ME2_BM5 ME3_BM1 ME3_BM2 ME3_BM3 ME3_BM4 ME3_BM5 ME4_BM1
## 1141 -1.7334 -2.1717 -6.0351 -2.5013 -1.1614 -1.0348 -1.1665 -4.5054 -0.7491
## 1142  1.2297  1.5206  2.4331  1.9087  1.4109  1.8187  1.8091  3.5476  3.2077
## 1143 -3.6209 -0.4379  3.3170 -3.8808 -4.0114 -2.8858 -4.3101  1.9898 -4.3556
## 1144  3.8364  4.6983  3.0187  4.7347  3.5965  3.9967  3.9258  4.2605  4.7589
## 1145  0.7390 -3.0060 -4.1636 -8.5724 -2.4178 -2.3881 -2.6836 -3.9083 -6.4840
## 1146  6.0397  6.3250  4.9877  0.1028  4.4464  4.3764  5.2353  4.4539  0.5204
##      ME4_BM2 ME4_BM3 ME4_BM4 ME4_BM5 BIG_LoBM ME5_BM2 ME5_BM3 ME5_BM4 BIG_HiBM
## 1141  1.8909 -1.2862  1.3608 -4.2888   3.3015  3.1933  0.4040 -1.5415  -3.4379
## 1142  1.5669  1.6435  3.2307  5.0117   3.3117  4.1280  0.5554  1.2868   4.3102
## 1143 -5.0464 -2.1698 -4.5869  1.4868  -5.2780 -6.2358 -4.3843 -0.7534   0.6721
## 1144  5.5583  5.4486  4.5498  4.4312   8.9941  5.6401  0.8190  5.5387   7.5280
## 1145 -1.9543 -2.9812 -1.3566 -4.6413   1.2334 -3.0009 -3.0145 -4.5284  -6.6413
## 1146  5.9283  5.3941  6.1848  3.9667   2.1199  5.2625  6.9025  5.8952   1.6612
##      Mkt_RF   SMB   HML   RF
## 1141   1.27 -3.96 -1.75 0.00
## 1142   2.90 -0.48 -0.13 0.00
## 1143  -4.37  0.80  5.09 0.00
## 1144   6.65 -2.28 -0.44 0.00
## 1145  -1.55 -1.35 -0.53 0.00
## 1146   3.10 -1.58  3.22 0.01
df_combined <- df_combined %>%
  mutate(MKT_RF_DIFF = Mkt_RF - RF)

Part 1 (b): Run Basic CAPM Regression

names_1 <- c()
r_squared_1 <- c()
realized_return_1 <- c()
pred_return_1 <- c()
reg_1 <- c()
for (i in 2:26) {
  reg_1[[i]] <- summary(lm(as.matrix(df_combined[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(RF), data = df_combined))
  names_1[[i]] <- colnames(df_combined[i])
  print(ggplot(data = df_combined, aes(x = Date)) + 
                        geom_line(aes(y = df_combined[ , i] - RF, color="Realized Returns")) + 
                        geom_line(aes(y = as.vector(reg_1[[i]]$coefficients[2,1]) * MKT_RF_DIFF, color="Predicted Returns")) +
                        labs(y = "Excess Returns", colour = "Legend") +
                        ggtitle(names_1[[i]]) +
                        theme(
                        plot.title = element_text(size = 15, face = "bold", hjust = 0.5, vjust = 2.5),
                        axis.title.x = element_text(size = 12, face = "bold", margin = margin(t = 10)),
                        axis.title.y = element_text(size = 12, face = "bold", margin = margin(r = 10)),
                        axis.text.x = element_text(size = 11),
                        axis.text.y = element_text(size = 11),
                        legend.title = element_text(size = 12, face = "bold"),
                        legend.text = element_text(size = 10.5),
                        legend.key.height = unit(1, 'cm'),
                        legend.key.width = unit(1.5, 'cm'),
                        legend.position = "right", legend.margin=margin(l = 30)) +
                        guides(color = guide_legend(override.aes = list(size = 1.5)))
        )
  pred_return_1[[i]] <- mean(reg_1[[i]]$coefficients[2,1] * df_combined$MKT_RF_DIFF)
  realized_return_1[[i]] <- mean(df_combined[ , i] - df_combined$RF)
  r_squared_1[[i]] <- reg_1[[i]]$adj.r.squared
}

pred_return_1 <- as.data.frame(as.matrix(pred_return_1))
pred_return_1 <- pred_return_1[-c(1),]
pred_return_1 <- as.data.frame(as.list(pred_return_1), col.names = names(df_combined[2:26]), row.names = "Predicted_Returns")
realized_return_1 <- as.data.frame(as.matrix(realized_return_1))
realized_return_1 <- realized_return_1[-c(1),]
realized_return_1 <- as.data.frame(as.list(realized_return_1), col.names = names(df_combined[2:26]), row.names = "Realized_Returns")
realized_vs_pred_return_1 <- rbind(pred_return_1, realized_return_1)
realized_vs_pred_return_1 <- as.data.frame(t(realized_vs_pred_return_1))
realized_vs_pred_return_1 <- realized_vs_pred_return_1 %>%
  add_column(Group = c('A', 'B', 'B', 'B', 'A', 'C', 'C', 'C', 'C', 'C', 'D', 'D', 'D', 'D', 'D', 'E', 'E', 'E', 'E', 'E', 'A', 'F', 'F', 'F', 'A'), .after="Realized_Returns")
realized_vs_pred_return_1[,1:2]
##            Predicted_Returns Realized_Returns
## SMALL_LoBM         0.6857270        0.6044773
## ME1_BM2            0.5967542        0.7300789
## ME1_BM3            0.5947202        1.0124432
## ME1_BM4            0.5383386        1.1812958
## SMALL_HiBM         0.5827222        1.3745457
## ME2_BM1            0.5387897        0.6670689
## ME2_BM2            0.5209447        0.9573097
## ME2_BM3            0.5100120        0.9869790
## ME2_BM4            0.5156962        1.0541577
## ME2_BM5            0.5886444        1.2567768
## ME3_BM1            0.5308739        0.7525312
## ME3_BM2            0.4791648        0.9231689
## ME3_BM3            0.4770094        0.9348139
## ME3_BM4            0.4991309        1.0248032
## ME3_BM5            0.5794921        1.1172081
## ME4_BM1            0.4621543        0.7635621
## ME4_BM2            0.4631926        0.8089417
## ME4_BM3            0.4682924        0.8660339
## ME4_BM4            0.4955658        0.9690417
## ME4_BM5            0.6012664        1.0536288
## BIG_LoBM           0.4071054        0.6872679
## ME5_BM2            0.4031932        0.6583507
## ME5_BM3            0.4126699        0.7236165
## ME5_BM4            0.4711381        0.6623302
## BIG_HiBM           0.5579382        0.9480490

Part 1 (c): Plot Predicted Vs Realized Returns

library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.1.2
ggplot(data = realized_vs_pred_return_1, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_1))) +
  geom_point(size = 2) +
  geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.75)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.4)) +
  labs(y = "Realized Returns", x = "Predicted Returns")

r_squared_1 <- as.data.frame(as.matrix(r_squared_1))
r_squared_1 <- r_squared_1[-c(1),]
r_squared_1 <- as.data.frame(as.list(r_squared_1), col.names = names(df_combined[2:26]), row.names = "R_Squared_CAPM")

Part 1 (d): Run Fama French Regression Model

names_2 <- c()
r_squared_2 <- c()
realized_return_2 <- c()
pred_return_2 <- c()
reg_2 <- c()
for (i in 2:26) {
  reg_2[[i]] <- summary(lm(as.matrix(df_combined[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined))
  names_2[[i]] <- colnames(df_combined[i])
  print(ggplot(data = df_combined, aes(x = Date)) + 
                        geom_line(aes(y = df_combined[ , i] - RF, color="Realized Returns")) + 
                        geom_line(aes(y = as.vector(reg_2[[i]]$coefficients[2,1]) * MKT_RF_DIFF, color="Predicted Returns")) +
                        labs(y = "Excess Returns", colour = "Legend") +
                        ggtitle(names_2[[i]]) +
                        theme(
                        plot.title = element_text(size = 15, face = "bold", hjust = 0.5, vjust = 2.5),
                        axis.title.x = element_text(size = 12, face = "bold", margin = margin(t = 10)),
                        axis.title.y = element_text(size = 12, face = "bold", margin = margin(r = 10)),
                        axis.text.x = element_text(size = 11),
                        axis.text.y = element_text(size = 11),
                        legend.title = element_text(size = 12, face = "bold"),
                        legend.text = element_text(size = 10.5),
                        legend.key.height = unit(1, 'cm'),
                        legend.key.width = unit(1.5, 'cm'),
                        legend.position = "right", legend.margin=margin(l = 30)) +
                        guides(color = guide_legend(override.aes = list(size = 1.5)))
        )
  pred_return_2[[i]] <- mean(reg_2[[i]]$coefficients[2,1] * df_combined$MKT_RF_DIFF)
  realized_return_2[[i]] <- mean(df_combined[ , i] - df_combined$RF)
  r_squared_2[[i]] <- reg_2[[i]]$adj.r.squared
}

pred_return_2 <- as.data.frame(as.matrix(pred_return_2))
pred_return_2 <- pred_return_2[-c(1),]
pred_return_2 <- as.data.frame(as.list(pred_return_2), col.names = names(df_combined[2:26]), row.names = "Predicted_Returns")
realized_return_2 <- as.data.frame(as.matrix(realized_return_2))
realized_return_2 <- realized_return_2[-c(1),]
realized_return_2 <- as.data.frame(as.list(realized_return_2), col.names = names(df_combined[2:26]), row.names = "Realized_Returns")
realized_vs_pred_return_2 <- rbind(pred_return_2, realized_return_2)
realized_vs_pred_return_2 <- as.data.frame(t(realized_vs_pred_return_2))
realized_vs_pred_return_2 <- realized_vs_pred_return_2 %>%
  add_column(Group = c('A', 'B', 'B', 'B', 'A', 'C', 'C', 'C', 'C', 'C', 'D', 'D', 'D', 'D', 'D', 'E', 'E', 'E', 'E', 'E', 'A', 'F', 'F', 'F', 'A'), .after="Realized_Returns")
realized_vs_pred_return_2[,1:2]
##            Predicted_Returns Realized_Returns
## SMALL_LoBM         0.5442463        0.6044773
## ME1_BM2            0.4598130        0.7300789
## ME1_BM3            0.4589947        1.0124432
## ME1_BM4            0.4014335        1.1812958
## SMALL_HiBM         0.4167590        1.3745457
## ME2_BM1            0.4637794        0.6670689
## ME2_BM2            0.4335062        0.9573097
## ME2_BM3            0.4207845        0.9869790
## ME2_BM4            0.4114393        1.0541577
## ME2_BM5            0.4556272        1.2567768
## ME3_BM1            0.4821273        0.7525312
## ME3_BM2            0.4339799        0.9231689
## ME3_BM3            0.4196771        0.9348139
## ME3_BM4            0.4231335        1.0248032
## ME3_BM5            0.4745503        1.1172081
## ME4_BM1            0.4598769        0.7635621
## ME4_BM2            0.4377957        0.8089417
## ME4_BM3            0.4285701        0.8660339
## ME4_BM4            0.4398941        0.9690417
## ME4_BM5            0.5130312        1.0536288
## BIG_LoBM           0.4372281        0.6872679
## ME5_BM2            0.4171068        0.6583507
## ME5_BM3            0.4091017        0.7236165
## ME5_BM4            0.4406138        0.6623302
## BIG_HiBM           0.5015446        0.9480490
ggplot(data = realized_vs_pred_return_2, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_2))) +
  geom_point(size = 2) +
  geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.555)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.4)) +
  labs(y = "Realized Returns", x = "Predicted Returns") 

r_squared_2 <- as.data.frame(as.matrix(r_squared_2))
r_squared_2 <- r_squared_2[-c(1),]
r_squared_2 <- as.data.frame(as.list(r_squared_2), col.names = names(df_combined[2:26]), row.names = "R_Squared_Fama_French")
CAPM_vs_FF_R2 <- rbind(r_squared_1, r_squared_2)
CAPM_vs_FF_R2 <- as.data.frame(t(CAPM_vs_FF_R2))
CAPM_vs_FF_R2 <- CAPM_vs_FF_R2 %>%
  mutate(Diff = R_Squared_Fama_French - R_Squared_CAPM)
CAPM_vs_FF_R2 <- cbind(Portfolio = rownames(CAPM_vs_FF_R2), CAPM_vs_FF_R2)
rownames(CAPM_vs_FF_R2) <- 1:nrow(CAPM_vs_FF_R2)
CAPM_vs_FF_R2
##     Portfolio R_Squared_CAPM R_Squared_Fama_French       Diff
## 1  SMALL_LoBM      0.5095481             0.6579181 0.14837000
## 2     ME1_BM2      0.5852910             0.8217644 0.23647337
## 3     ME1_BM3      0.6595627             0.8751185 0.21555582
## 4     ME1_BM4      0.6616252             0.9286335 0.26700830
## 5  SMALL_HiBM      0.6206036             0.9288200 0.30821645
## 6     ME2_BM1      0.7208177             0.9100205 0.18920279
## 7     ME2_BM2      0.7655697             0.9315921 0.16602234
## 8     ME2_BM3      0.7828256             0.9337152 0.15088955
## 9     ME2_BM4      0.7616347             0.9526927 0.19105806
## 10    ME2_BM5      0.7189660             0.9511521 0.23218610
## 11    ME3_BM1      0.8127497             0.9285372 0.11578748
## 12    ME3_BM2      0.8660324             0.9303425 0.06431012
## 13    ME3_BM3      0.8511719             0.9266235 0.07545166
## 14    ME3_BM4      0.8077029             0.9321766 0.12447369
## 15    ME3_BM5      0.7488635             0.9232252 0.17436167
## 16    ME4_BM1      0.8722148             0.9325506 0.06033586
## 17    ME4_BM2      0.9056843             0.9225659 0.01688166
## 18    ME4_BM3      0.8686237             0.9142044 0.04558063
## 19    ME4_BM4      0.8223125             0.9160716 0.09375907
## 20    ME4_BM5      0.7578851             0.9130054 0.15512034
## 21   BIG_LoBM      0.9171262             0.9548618 0.03773557
## 22    ME5_BM2      0.9193302             0.9310154 0.01168527
## 23    ME5_BM3      0.8505411             0.9048232 0.05428211
## 24    ME5_BM4      0.8016296             0.9229742 0.12134460
## 25   BIG_HiBM      0.6682738             0.8347205 0.16644664
print(paste0("The average R Squared over the 25 Portfolios is ", round(mean(CAPM_vs_FF_R2$R_Squared_CAPM), digits = 6), " under the CAPM Model"))
## [1] "The average R Squared over the 25 Portfolios is 0.770263 under the CAPM Model"
print('')
## [1] ""
print(paste0("The average R Squared over the 25 Portfolios is ", round(mean(CAPM_vs_FF_R2$R_Squared_Fama_French), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 25 Portfolios is 0.907165 under the Fama French 3 Factor Model"

We can see that for all 25 portfolios formed on book-to-market, the Fama French 3 Factor Model is more accurate than the CAPM model. For example, the Fama French Model accounts for over 30% more variation than the CAPM model does for the portfolio labelled “SMALL_Hi_BM”. The CAPM model is a good rudimentary and versatile model, but it only uses one factor to explain portfolio returns. Additionally, this factor is the difference between the index return and the risk-free rate. Thus, if a given portfolio has similar weights and holdings to the market index, the CAPM model will be able to price it well. However, if a given portfolio has wildly different weights and holdings when compared to the market index, the CAPM model will likely struggle to accurately price it. The plots of mean realized excess returns against the predicted excess returns further supports this conclusion. We can see that the plot corresponding to the Fama French Model does a better job of pricing the portfolios than the CAPM model.

library(reshape)
## Warning: package 'reshape' was built under R version 4.1.2
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
## 
##     stamp
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
CAPM_vs_FF_R2_plot_prep <- select(CAPM_vs_FF_R2, -c(Diff))
CAPM_vs_FF_R2_melt <- melt(CAPM_vs_FF_R2_plot_prep, "Portfolio")

ggplot(data = CAPM_vs_FF_R2_melt, aes(x = Portfolio, y = value)) +
  geom_bar(aes(fill=variable), stat = "identity", position = position_dodge(width=0.4), width = 0.4) +
  ggtitle("CAPM Vs Fama French") +
  labs(y = "R_Squared") +
                        theme(
                        plot.title = element_text(size = 15, face = "bold", hjust = 0.5, vjust = 2.5),
                        axis.title.x = element_text(size = 12, face = "bold", margin = margin(t = 10)),
                        axis.title.y = element_text(size = 12, face = "bold", margin = margin(r = 10)),
                        axis.text.x = element_text(size = 11, angle=-40, hjust=0.1),
                        axis.text.y = element_text(size = 11),
                        legend.title = element_text(size = 12, face = "bold"),
                        legend.text = element_text(size = 10.5),
                        legend.key.height = unit(1, 'cm'),
                        legend.key.width = unit(1, 'cm'),
                        legend.position = "right", legend.margin=margin(l = 20))+
  guides(fill=guide_legend(title="Legend"))

Part 1 (e): Analysis of Additional Portfolios

Portfolios Formed on Operating Profitability

df_OP <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\Portfolios_Formed_on_OP.csv", header = TRUE, skip = 24)
df_OP <- df_OP[1:702,]

names(df_OP)[names(df_OP) == "X"] <- "Date"
df_OP <- as.data.frame(lapply(df_OP, as.numeric))
df_OP$Date <- parse_date_time(df_OP$Date, "ym")
df_FF_OP <- df_FF[445:1146,]

df_combined_OP <- left_join(df_OP, df_FF_OP, by = "Date")
df_combined_OP <- df_combined_OP %>%
  mutate(MKT_RF_DIFF = Mkt_RF - RF)
df_combined_OP <- df_combined_OP[-c(2:9)]
names(df_combined_OP) <- gsub("\\.", "_", names(df_combined_OP))

head(df_combined_OP)
##         Date Lo_10 Dec_2 Dec_3 Dec_4 Dec_5 Dec_6 Dec_7 Dec_8 Dec_9 Hi_10 Mkt_RF
## 1 1963-07-01 -1.44 -0.19 -1.02  2.19 -0.84  0.92 -0.72 -1.15 -0.31  0.98  -0.39
## 2 1963-08-01  5.53  4.45  6.00  3.55  5.62  5.11  5.57  5.36  5.90  6.07   5.07
## 3 1963-09-01 -3.02 -1.65 -1.15 -2.59  2.21 -1.88 -2.48 -1.34 -2.48 -0.97  -1.57
## 4 1963-10-01  0.77  0.85  0.97 -0.05  3.41  0.84  0.98  3.67  2.92  8.88   2.53
## 5 1963-11-01  0.18 -1.74 -1.81 -0.63  2.05 -0.63 -0.74  1.06  0.30 -3.73  -0.85
## 6 1963-12-01  1.86  0.03  3.47  4.30  0.14  3.34  2.68  2.53  2.60  1.35   1.83
##     SMB   HML   RF MKT_RF_DIFF
## 1 -0.53 -0.89 0.27       -0.66
## 2 -0.89  1.68 0.25        4.82
## 3 -0.35  0.08 0.27       -1.84
## 4 -0.56 -0.14 0.29        2.24
## 5 -1.17  1.81 0.27       -1.12
## 6 -2.11 -0.08 0.29        1.54
r_squared_3 <- c()
realized_return_3 <- c()
pred_return_3 <- c()
reg_3 <- c()

for (i in 2:11) {
  reg_3[[i]] <- summary(lm(as.matrix(df_combined_OP[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_OP))
  pred_return_3[[i]] <- mean(reg_3[[i]]$coefficients[2,1] * df_combined_OP$MKT_RF_DIFF)
  realized_return_3[[i]] <- mean(df_combined_OP[ , i] - df_combined_OP$RF)
  r_squared_3[[i]] <- reg_3[[i]]$adj.r.squared
}
pred_return_3 <- as.data.frame(as.matrix(pred_return_3))
pred_return_3 <- pred_return_3[-c(1),]
pred_return_3 <- as.data.frame(as.list(pred_return_3), col.names = names(df_combined_OP[2:11]), row.names = "Predicted_Returns")
realized_return_3 <- as.data.frame(as.matrix(realized_return_3))
realized_return_3 <- realized_return_3[-c(1),]
realized_return_3 <- as.data.frame(as.list(realized_return_3), col.names = names(df_combined_OP[2:11]), row.names = "Realized_Returns")
realized_vs_pred_return_3 <- rbind(pred_return_3, realized_return_3)
realized_vs_pred_return_3 <- as.data.frame(t(realized_vs_pred_return_3))
realized_vs_pred_return_3 <- realized_vs_pred_return_3 %>%
  add_column(Group = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), .after="Realized_Returns")
realized_vs_pred_return_3[,1:2]
##       Predicted_Returns Realized_Returns
## Lo_10         0.2619437        0.4304843
## Dec_2         0.2373854        0.4591168
## Dec_3         0.2341771        0.4872222
## Dec_4         0.2196764        0.5520513
## Dec_5         0.2240728        0.6761823
## Dec_6         0.2235734        0.5732906
## Dec_7         0.2194115        0.5531339
## Dec_8         0.2228889        0.6916382
## Dec_9         0.2165977        0.7410399
## Hi_10         0.2174477        0.6822507
ggplot(data = realized_vs_pred_return_3, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_3))) +
  geom_point(size = 2) +
  geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.30)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 0.8)) +
  labs(y = "Realized Returns", x = "Predicted Returns") 

r_squared_3 <- as.data.frame(as.matrix(r_squared_3))
r_squared_3 <- r_squared_3[-c(1),]
r_squared_3 <- as.data.frame(as.list(r_squared_3), col.names = names(df_combined_OP[2:11]), row.names = "R_Squared")
r_squared_3 <- as.data.frame(t(r_squared_3))
r_squared_3 <- cbind(Operating_Profitability_Portfolios = rownames(r_squared_3), r_squared_3)
rownames(r_squared_3) <- 1:nrow(r_squared_3)
r_squared_3
##    Operating_Profitability_Portfolios R_Squared
## 1                               Lo_10 0.8597687
## 2                               Dec_2 0.8754843
## 3                               Dec_3 0.8973700
## 4                               Dec_4 0.8939074
## 5                               Dec_5 0.8912305
## 6                               Dec_6 0.9121761
## 7                               Dec_7 0.9209898
## 8                               Dec_8 0.9201417
## 9                               Dec_9 0.9137266
## 10                              Hi_10 0.8771008
print(paste0("The average R Squared over the 10 Portfolios is ", round(mean(r_squared_3$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 10 Portfolios is 0.89619 under the Fama French 3 Factor Model"

Portfolios Formed on Investment

df_INV <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\Portfolios_Formed_on_INV.csv", header = TRUE, skip = 17)
df_INV <- df_INV[1:702,]

names(df_INV)[names(df_INV) == "X"] <- "Date"
df_INV <- as.data.frame(lapply(df_INV, as.numeric))
df_INV$Date <- parse_date_time(df_INV$Date, "ym")
df_combined_INV <- left_join(df_INV, df_FF_OP, by = "Date")
df_combined_INV <- df_combined_INV %>%
  mutate(MKT_RF_DIFF = Mkt_RF - RF)
df_combined_INV <- df_combined_INV[-c(2:9)]
names(df_combined_INV) <- gsub("\\.", "_", names(df_combined_INV))

head(df_combined_INV)
##         Date Lo_10 Dec_2 Dec_3 Dec_4 Dec_5 Dec_6 Dec_7 Dec_8 Dec_9 Hi_10 Mkt_RF
## 1 1963-07-01 -2.34 -0.69 -0.12 -0.10  1.04  0.25  0.25  0.30  0.49  0.73  -0.39
## 2 1963-08-01  5.40  6.90  5.34  5.36  5.64  5.30  4.54  4.51  6.05  7.72   5.07
## 3 1963-09-01 -0.83 -0.37 -3.21 -0.32 -2.76 -2.18 -0.26 -0.80 -1.15 -1.85  -1.57
## 4 1963-10-01  2.91  2.37  1.41  0.66  0.95  1.61  2.41  4.41  8.41  4.38   2.53
## 5 1963-11-01  1.85 -0.62 -0.40 -1.53 -1.80 -1.11  1.77 -0.40 -4.39 -0.64  -0.85
## 6 1963-12-01  2.19  2.26  1.83  3.82  1.84  2.38  1.43  3.86  1.13  1.06   1.83
##     SMB   HML   RF MKT_RF_DIFF
## 1 -0.53 -0.89 0.27       -0.66
## 2 -0.89  1.68 0.25        4.82
## 3 -0.35  0.08 0.27       -1.84
## 4 -0.56 -0.14 0.29        2.24
## 5 -1.17  1.81 0.27       -1.12
## 6 -2.11 -0.08 0.29        1.54
r_squared_4 <- c()
realized_return_4 <- c()
pred_return_4 <- c()
reg_4 <- c()

for (i in 2:11) {
  reg_4[[i]] <- summary(lm(as.matrix(df_combined_INV[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_INV))
  pred_return_4[[i]] <- mean(reg_4[[i]]$coefficients[2,1] * df_combined_INV$MKT_RF_DIFF)
  realized_return_4[[i]] <- mean(df_combined_INV[ , i] - df_combined_INV$RF)
  r_squared_4[[i]] <- reg_4[[i]]$adj.r.squared
}
pred_return_4 <- as.data.frame(as.matrix(pred_return_4))
pred_return_4 <- pred_return_4[-c(1),]
pred_return_4 <- as.data.frame(as.list(pred_return_4), col.names = names(df_combined_INV[2:11]), row.names = "Predicted_Returns")
realized_return_4 <- as.data.frame(as.matrix(realized_return_4))
realized_return_4 <- realized_return_4[-c(1),]
realized_return_4 <- as.data.frame(as.list(realized_return_4), col.names = names(df_combined_INV[2:11]), row.names = "Realized_Returns")
realized_vs_pred_return_4 <- rbind(pred_return_4, realized_return_4)
realized_vs_pred_return_4 <- as.data.frame(t(realized_vs_pred_return_4))
realized_vs_pred_return_4 <- realized_vs_pred_return_4 %>%
  add_column(Group = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), .after="Realized_Returns")
realized_vs_pred_return_4[,1:2]
##       Predicted_Returns Realized_Returns
## Lo_10         0.2322115        0.7944017
## Dec_2         0.2212424        0.8185897
## Dec_3         0.2101821        0.7330057
## Dec_4         0.2057983        0.6099003
## Dec_5         0.2061364        0.6184330
## Dec_6         0.2154027        0.5964387
## Dec_7         0.2162181        0.6116952
## Dec_8         0.2261915        0.6499145
## Dec_9         0.2413185        0.6731909
## Hi_10         0.2649270        0.4253134
ggplot(data = realized_vs_pred_return_4, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_4))) +
  geom_point(size = 2) +
  geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.30)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 0.9)) +
  labs(y = "Realized Returns", x = "Predicted Returns") 

r_squared_4 <- as.data.frame(as.matrix(r_squared_4))
r_squared_4 <- r_squared_4[-c(1),]
r_squared_4 <- as.data.frame(as.list(r_squared_4), col.names = names(df_combined_INV[2:11]), row.names = "R_Squared")
r_squared_4 <- as.data.frame(t(r_squared_4))
r_squared_4 <- cbind(Investment_Portfolios = rownames(r_squared_4), r_squared_4)
rownames(r_squared_4) <- 1:nrow(r_squared_4)
r_squared_4
##    Investment_Portfolios R_Squared
## 1                  Lo_10 0.8606610
## 2                  Dec_2 0.8578449
## 3                  Dec_3 0.8650878
## 4                  Dec_4 0.8870272
## 5                  Dec_5 0.9013214
## 6                  Dec_6 0.9109013
## 7                  Dec_7 0.9018029
## 8                  Dec_8 0.9171260
## 9                  Dec_9 0.9028660
## 10                 Hi_10 0.9253423
print(paste0("The average R Squared over the 10 Portfolios is ", round(mean(r_squared_4$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 10 Portfolios is 0.892998 under the Fama French 3 Factor Model"

Portfolios Formed on Dividend Yield

df_DY <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\Portfolios_Formed_on_D-P.csv", header = TRUE, skip = 19)
df_DY <- df_DY[1:1134,]

names(df_DY)[names(df_DY) == "X"] <- "Date"
df_DY <- as.data.frame(lapply(df_DY, as.numeric))
df_DY$Date <- parse_date_time(df_DY$Date, "ym")
df_FF_DY <- df_FF[13:1146,]

df_combined_DY <- left_join(df_DY, df_FF_DY, by = "Date")
df_combined_DY <- df_combined_DY %>%
  mutate(MKT_RF_DIFF = Mkt_RF - RF)
df_combined_DY <- df_combined_DY[-c(2:10)]
names(df_combined_DY) <- gsub("\\.", "_", names(df_combined_DY))

head(df_combined_DY)
##         Date Lo_10 Dec_2 Dec_3 Dec_4 Dec_5 Dec_6 Dec_7 Dec_8 Dec_9 Hi_10 Mkt_RF
## 1 1927-07-01 11.50  6.00  7.68  8.79  5.56  9.26 12.38  6.88  5.65  4.02   7.26
## 2 1927-08-01  5.44  3.40  0.88  2.59 -0.12  2.21  5.83  2.44  3.30 -2.31   1.97
## 3 1927-09-01  1.84  6.61  4.01  5.20  4.47  5.72  5.67  3.88  3.13  3.50   4.76
## 4 1927-10-01 -6.51 -3.75 -5.24 -5.62 -2.12 -2.88 -3.47 -3.27 -0.98 -4.13  -4.31
## 5 1927-11-01  6.98  6.59  6.92  9.68  4.18  7.01  5.48  7.67  8.14 11.18   6.58
## 6 1927-12-01  2.05  1.40  1.67  2.85  0.76  0.06  6.30  5.21  8.68  0.55   2.09
##     SMB   HML   RF MKT_RF_DIFF
## 1 -3.25 -1.14 0.30        6.96
## 2 -0.69 -3.74 0.28        1.69
## 3 -3.63 -0.63 0.21        4.55
## 4  2.12 -4.33 0.25       -4.56
## 5  2.72 -0.27 0.21        6.37
## 6  0.97 -1.13 0.22        1.87
r_squared_5 <- c()
realized_return_5 <- c()
pred_return_5 <- c()
reg_5 <- c()

for (i in 2:11) {
  reg_5[[i]] <- summary(lm(as.matrix(df_combined_DY[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_DY))
  pred_return_5[[i]] <- mean(reg_5[[i]]$coefficients[2,1] * df_combined_DY$MKT_RF_DIFF)
  realized_return_5[[i]] <- mean(df_combined_DY[ , i] - df_combined_DY$RF)
  r_squared_5[[i]] <- reg_5[[i]]$adj.r.squared
}
pred_return_5 <- as.data.frame(as.matrix(pred_return_5))
pred_return_5 <- pred_return_5[-c(1),]
pred_return_5 <- as.data.frame(as.list(pred_return_5), col.names = names(df_combined_DY[2:11]), row.names = "Predicted_Returns")
realized_return_5 <- as.data.frame(as.matrix(realized_return_5))
realized_return_5 <- realized_return_5[-c(1),]
realized_return_5 <- as.data.frame(as.list(realized_return_5), col.names = names(df_combined_DY[2:11]), row.names = "Realized_Returns")
realized_vs_pred_return_5 <- rbind(pred_return_5, realized_return_5)
realized_vs_pred_return_5 <- as.data.frame(t(realized_vs_pred_return_5))
realized_vs_pred_return_5 <- realized_vs_pred_return_5 %>%
  add_column(Group = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), .after="Realized_Returns")
realized_vs_pred_return_5[,1:2]
##       Predicted_Returns Realized_Returns
## Lo_10         0.4789129        0.6484568
## Dec_2         0.4363415        0.7116578
## Dec_3         0.4216964        0.6883157
## Dec_4         0.4020086        0.7709788
## Dec_5         0.4089952        0.6183598
## Dec_6         0.3920431        0.7398060
## Dec_7         0.3837628        0.7935802
## Dec_8         0.3747815        0.8046825
## Dec_9         0.3793918        0.7846825
## Hi_10         0.3621550        0.7834656
ggplot(data = realized_vs_pred_return_5, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_5))) +
  geom_point(size = 2) +
  geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.55)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 0.9)) +
  labs(y = "Realized Returns", x = "Predicted Returns") 

r_squared_5 <- as.data.frame(as.matrix(r_squared_5))
r_squared_5 <- r_squared_5[-c(1),]
r_squared_5 <- as.data.frame(as.list(r_squared_5), col.names = names(df_combined_DY[2:11]), row.names = "R_Squared")
r_squared_5 <- as.data.frame(t(r_squared_5))
r_squared_5 <- cbind(Dividend_Yield_Portfolios = rownames(r_squared_5), r_squared_5)
rownames(r_squared_5) <- 1:nrow(r_squared_5)
r_squared_5
##    Dividend_Yield_Portfolios R_Squared
## 1                      Lo_10 0.8817372
## 2                      Dec_2 0.9036159
## 3                      Dec_3 0.8886744
## 4                      Dec_4 0.8954145
## 5                      Dec_5 0.8731314
## 6                      Dec_6 0.8661112
## 7                      Dec_7 0.8620661
## 8                      Dec_8 0.8516656
## 9                      Dec_9 0.8290817
## 10                     Hi_10 0.7978232
print(paste0("The average R Squared over the 10 Portfolios is ", round(mean(r_squared_5$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 10 Portfolios is 0.864932 under the Fama French 3 Factor Model"

Portfolios Formed on Momentum

df_MM <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\10_Portfolios_Prior_12_2.csv", header = TRUE, skip = 10)
df_MM <- df_MM[1:1140,]

names(df_MM)[names(df_MM) == "X"] <- "Date"
df_MM <- as.data.frame(lapply(df_MM, as.numeric))
df_MM$Date <- parse_date_time(df_MM$Date, "ym")
df_FF_MM <- df_FF[7:1146,]

df_combined_MM <- left_join(df_MM, df_FF_MM, by = "Date")
df_combined_MM <- df_combined_MM %>%
  mutate(MKT_RF_DIFF = Mkt_RF - RF)
names(df_combined_MM) <- gsub("\\.", "_", names(df_combined_MM))

head(df_combined_MM)
##         Date Lo_PRIOR PRIOR_2 PRIOR_3 PRIOR_4 PRIOR_5 PRIOR_6 PRIOR_7 PRIOR_8
## 1 1927-01-01    -3.32   -4.46    2.69   -0.33   -0.41    0.97    0.74    0.36
## 2 1927-02-01     7.53    5.93    8.25    7.27    3.22    4.21    2.83    3.26
## 3 1927-03-01    -3.23   -3.01   -3.92   -4.77   -0.48   -2.42    2.05    0.41
## 4 1927-04-01     2.04   -3.20   -2.47   -1.37    2.20   -0.05    2.05   -0.59
## 5 1927-05-01     2.72    4.57    5.95    3.18    6.36    5.80    4.96    6.81
## 6 1927-06-01    -6.44   -3.51   -3.13   -2.62   -2.10   -1.91   -1.92   -1.64
##   PRIOR_9 Hi_PRIOR Mkt_RF   SMB   HML   RF MKT_RF_DIFF
## 1   -0.41    -0.24  -0.06 -0.51  4.73 0.25       -0.31
## 2    4.20     7.01   4.18 -0.25  3.27 0.26        3.92
## 3    0.96     5.47   0.13 -1.89 -2.56 0.30       -0.17
## 4    1.59     5.49   0.46  0.49  0.71 0.25        0.21
## 5    8.11     6.32   5.44  1.46  4.98 0.30        5.14
## 6   -2.37    -1.58  -2.34  0.51 -1.53 0.26       -2.60
r_squared_6 <- c()
realized_return_6 <- c()
pred_return_6 <- c()
reg_6 <- c()

for (i in 2:11) {
  reg_6[[i]] <- summary(lm(as.matrix(df_combined_MM[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_MM))
  pred_return_6[[i]] <- mean(reg_6[[i]]$coefficients[2,1] * df_combined_MM$MKT_RF_DIFF)
  realized_return_6[[i]] <- mean(df_combined_MM[ , i] - df_combined_MM$RF)
  r_squared_6[[i]] <- reg_6[[i]]$adj.r.squared
}
pred_return_6 <- as.data.frame(as.matrix(pred_return_6))
pred_return_6 <- pred_return_6[-c(1),]
pred_return_6 <- as.data.frame(as.list(pred_return_6), col.names = names(df_combined_MM[2:11]), row.names = "Predicted_Returns")
realized_return_6 <- as.data.frame(as.matrix(realized_return_6))
realized_return_6 <- realized_return_6[-c(1),]
realized_return_6 <- as.data.frame(as.list(realized_return_6), col.names = names(df_combined_MM[2:11]), row.names = "Realized_Returns")
realized_vs_pred_return_6 <- rbind(pred_return_6, realized_return_6)
realized_vs_pred_return_6 <- as.data.frame(t(realized_vs_pred_return_6))
realized_vs_pred_return_6 <- realized_vs_pred_return_6 %>%
  add_column(Group = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), .after="Realized_Returns")
realized_vs_pred_return_6[,1:2]
##          Predicted_Returns Realized_Returns
## Lo_PRIOR         0.5913294        0.1015000
## PRIOR_2          0.5291004        0.4910965
## PRIOR_3          0.4742153        0.5092544
## PRIOR_4          0.4497942        0.6647719
## PRIOR_5          0.4271412        0.6474298
## PRIOR_6          0.4287309        0.7110789
## PRIOR_7          0.4129910        0.7688860
## PRIOR_8          0.3990540        0.8615088
## PRIOR_9          0.4125182        0.9400789
## Hi_PRIOR         0.4293843        1.2398596
ggplot(data = realized_vs_pred_return_6, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_6))) +
  geom_point(size = 2) +
  geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.6)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.3)) +
  labs(y = "Realized Returns", x = "Predicted Returns") 

r_squared_6 <- as.data.frame(as.matrix(r_squared_6))
r_squared_6 <- r_squared_6[-c(1),]
r_squared_6 <- as.data.frame(as.list(r_squared_6), col.names = names(df_combined_MM[2:11]), row.names = "R_Squared")
r_squared_6 <- as.data.frame(t(r_squared_6))
r_squared_6 <- cbind(Momentum_Portfolios = rownames(r_squared_6), r_squared_6)
rownames(r_squared_6) <- 1:nrow(r_squared_6)
r_squared_6
##    Momentum_Portfolios R_Squared
## 1             Lo_PRIOR 0.7735194
## 2              PRIOR_2 0.8177860
## 3              PRIOR_3 0.8444302
## 4              PRIOR_4 0.8743757
## 5              PRIOR_5 0.8961707
## 6              PRIOR_6 0.9207952
## 7              PRIOR_7 0.9010999
## 8              PRIOR_8 0.8821015
## 9              PRIOR_9 0.8522983
## 10            Hi_PRIOR 0.7627175
print(paste0("The average R Squared over the 10 Portfolios is ", round(mean(r_squared_6$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 10 Portfolios is 0.852529 under the Fama French 3 Factor Model"

17 Industry Portfolios

df_17 <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\17_Industry_Portfolios.csv", header = TRUE, skip = 11)
df_17 <- df_17[1:1146,]

names(df_17)[names(df_17) == "X"] <- "Date"
df_17 <- as.data.frame(lapply(df_17, as.numeric))
df_17$Date <- parse_date_time(df_17$Date, "ym")
df_FF_17 <- df_FF[1:1146,]

df_combined_17 <- left_join(df_17, df_FF_17, by = "Date")
df_combined_17 <- df_combined_17 %>%
  mutate(MKT_RF_DIFF = Mkt_RF - RF)

head(df_combined_17)
##         Date  Food Mines   Oil Clths Durbl Chems Cnsum Cnstr Steel FabPr Machn
## 1 1926-07-01  0.48  3.78 -1.41  6.02 -1.62  8.46  1.42  2.31  4.07  8.77  3.79
## 2 1926-08-01  2.91  0.69  3.60  0.15 -1.96  5.70  5.84  4.33  2.17 -5.56  2.35
## 3 1926-09-01  1.20  1.10 -3.68  0.26  0.24  5.48  1.21 -0.06  0.15 -4.13 -0.65
## 4 1926-10-01 -3.06 -0.79 -1.02  0.37 -6.07 -4.76  0.69 -4.79 -3.85 -5.13 -3.29
## 5 1926-11-01  6.37  4.38 -0.01  2.22 -1.95  5.27  4.63  2.45  3.86  3.57  4.54
## 6 1926-12-01 -0.57  1.66  2.92  2.17  3.79  5.20  2.27  4.30  3.75 -3.85  1.29
##    Cars Trans Utils Rtail Finan Other Mkt_RF   SMB   HML   RF MKT_RF_DIFF
## 1 17.43  1.83  7.04  0.13 -0.02  1.22   2.96 -2.38 -2.73 0.22        2.74
## 2  3.96  4.54 -1.69 -0.68  4.47  3.11   2.64 -1.47  4.14 0.25        2.39
## 3  5.57  0.32  2.04  0.21 -1.61  1.82   0.36 -1.39  0.12 0.23        0.13
## 4 -8.29 -2.90 -2.63 -2.26 -5.51 -0.88  -3.24 -0.13  0.65 0.32       -3.56
## 5 -0.28  2.19  3.71  6.44  2.34  1.38   2.53 -0.16 -0.38 0.31        2.22
## 6 10.80  3.56 -0.17  0.61  2.60  1.82   2.62 -0.02  0.00 0.28        2.34
r_squared_7 <- c()
realized_return_7 <- c()
pred_return_7 <- c()
reg_7 <- c()

for (i in 2:18) {
  reg_7[[i]] <- summary(lm(as.matrix(df_combined_17[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_17))
  pred_return_7[[i]] <- mean(reg_7[[i]]$coefficients[2,1] * df_combined_17$MKT_RF_DIFF)
  realized_return_7[[i]] <- mean(df_combined_17[ , i] - df_combined_17$RF)
  r_squared_7[[i]] <- reg_7[[i]]$adj.r.squared
}
pred_return_7 <- as.data.frame(as.matrix(pred_return_7))
pred_return_7 <- pred_return_7[-c(1),]
pred_return_7 <- as.data.frame(as.list(pred_return_7), col.names = names(df_combined_17[2:18]), row.names = "Predicted_Returns")
realized_return_7 <- as.data.frame(as.matrix(realized_return_7))
realized_return_7 <- realized_return_7[-c(1),]
realized_return_7 <- as.data.frame(as.list(realized_return_7), col.names = names(df_combined_17[2:18]), row.names = "Realized_Returns")
realized_vs_pred_return_7 <- rbind(pred_return_7, realized_return_7)
realized_vs_pred_return_7 <- as.data.frame(t(realized_vs_pred_return_7))
realized_vs_pred_return_7
##       Predicted_Returns Realized_Returns
## Food          0.3276799        0.7159773
## Mines         0.3789590        0.7025567
## Oil           0.3774229        0.7406545
## Clths         0.3531473        0.7065532
## Durbl         0.4877504        0.6821990
## Chems         0.4548016        0.7823822
## Cnsum         0.3329988        0.7650262
## Cnstr         0.4760653        0.7816754
## Steel         0.5339467        0.6712042
## FabPr         0.4029743        0.7143019
## Machn         0.5184376        0.8710558
## Cars          0.5067694        0.9632286
## Trans         0.4446450        0.7072949
## Utils         0.3222166        0.6166579
## Rtail         0.4113723        0.7808115
## Finan         0.4788713        0.7584817
## Other         0.3863271        0.6735166
ggplot(data = realized_vs_pred_return_7, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_7))) +
  geom_point(size = 2) +
  geom_label_repel(fontface = 'bold', min.segment.length = 0, segment.color = 'black') +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.55)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.1)) +
  labs(y = "Realized Returns", x = "Predicted Returns") 

r_squared_7 <- as.data.frame(as.matrix(r_squared_7))
r_squared_7 <- r_squared_7[-c(1),]
r_squared_7 <- as.data.frame(as.list(r_squared_7), col.names = names(df_combined_17[2:18]), row.names = "R_Squared")
r_squared_7 <- as.data.frame(t(r_squared_7))
r_squared_7 <- cbind(Industry_Portfolios = rownames(r_squared_7), r_squared_7)
rownames(r_squared_7) <- 1:nrow(r_squared_7)
r_squared_7
##    Industry_Portfolios R_Squared
## 1                 Food 0.7220694
## 2                Mines 0.5389444
## 3                  Oil 0.5837083
## 4                Clths 0.6739104
## 5                Durbl 0.8053966
## 6                Chems 0.7706768
## 7                Cnsum 0.6654104
## 8                Cnstr 0.8403978
## 9                Steel 0.7567506
## 10               FabPr 0.7423396
## 11               Machn 0.8598135
## 12                Cars 0.6958938
## 13               Trans 0.8272660
## 14               Utils 0.5893746
## 15               Rtail 0.7541838
## 16               Finan 0.8643262
## 17               Other 0.8993971
print(paste0("The average R Squared over the 17 Industry Portfolios is ", round(mean(r_squared_7$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 17 Industry Portfolios is 0.74058 under the Fama French 3 Factor Model"

30 Industry Portfolios

df_30 <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\30_Industry_Portfolios.csv", header = TRUE, skip = 11)
df_30 <- df_30[1:1146,]

names(df_30)[names(df_30) == "X"] <- "Date"
df_30 <- as.data.frame(lapply(df_30, as.numeric))
df_30$Date <- parse_date_time(df_30$Date, "ym")
df_FF_30 <- df_FF[1:1146,]

df_combined_30 <- left_join(df_30, df_FF_30, by = "Date")
df_combined_30 <- df_combined_30 %>%
  mutate(MKT_RF_DIFF = Mkt_RF - RF)

head(df_combined_30)
##         Date  Food  Beer Smoke Games Books Hshld Clths  Hlth Chems Txtls Cnstr
## 1 1926-07-01  0.56 -5.19  1.29  2.93 10.97 -0.48  8.08  1.77  8.14  0.39  2.07
## 2 1926-08-01  2.59 27.03  6.50  0.55 10.01 -3.58 -2.51  4.25  5.50  7.97  4.72
## 3 1926-09-01  1.16  4.02  1.26  6.58 -0.99  0.73 -0.51  0.69  5.33  2.30 -0.50
## 4 1926-10-01 -3.06 -3.31  1.06 -4.76  9.47 -4.68  0.12 -0.57 -4.76  1.00 -4.55
## 5 1926-11-01  6.35  7.29  4.55  1.66 -5.80 -0.54  1.87  5.42  5.20  3.10  2.20
## 6 1926-12-01 -0.51 -4.09  2.55  2.17  0.53  2.56  0.60  0.11  5.37  6.39  3.52
##   Steel FabPr ElcEq Autos Carry Mines  Coal   Oil  Util Telcm Servs BusEq Paper
## 1  4.07  5.43  3.18 16.39  1.02  5.64  1.54 -1.40  7.04  0.83  9.22  2.06  7.70
## 2  2.17  2.06  2.10  4.23  1.66  0.55  0.85  3.69 -1.69  2.17  2.02  4.39 -2.38
## 3  0.15  0.36 -0.56  4.83  2.73  1.74  0.30 -3.69  2.04  2.41  2.25  0.19 -5.54
## 4 -3.85  1.11 -5.73 -7.93 -5.56 -3.20  2.23 -1.04 -2.63 -0.11 -2.00 -1.09 -5.08
## 5  3.86  3.18  5.08 -0.66  7.87  8.46 -0.48  0.06  3.71  1.63  3.77  3.64  3.84
## 6  3.75  5.02 -2.47 10.49  2.48  1.83  1.43  2.94 -0.17  1.99  6.21  7.24 -4.63
##   Trans  Whlsl Rtail Meals   Fin Other Mkt_RF   SMB   HML   RF MKT_RF_DIFF
## 1  1.91 -23.79  0.07  1.87 -0.02  5.20   2.96 -2.38 -2.73 0.22        2.74
## 2  4.85   5.39 -0.75 -0.13  4.47  6.76   2.64 -1.47  4.14 0.25        2.39
## 3  0.07  -7.87  0.25 -0.56 -1.61 -3.86   0.36 -1.39  0.12 0.23        0.13
## 4 -2.61 -15.38 -2.20 -4.11 -5.51 -8.49  -3.24 -0.13  0.65 0.32       -3.56
## 5  1.61   4.67  6.52  4.33  2.34  4.00   2.53 -0.16 -0.38 0.31        2.22
## 6  3.68   9.65  0.57  1.51  2.60 -2.34   2.62 -0.02  0.00 0.28        2.34
r_squared_8 <- c()
realized_return_8 <- c()
pred_return_8 <- c()
reg_8 <- c()

for (i in 2:31) {
  reg_8[[i]] <- summary(lm(as.matrix(df_combined_30[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_30))
  pred_return_8[[i]] <- mean(reg_8[[i]]$coefficients[2,1] * df_combined_30$MKT_RF_DIFF)
  realized_return_8[[i]] <- mean(df_combined_30[ , i] - df_combined_30$RF)
  r_squared_8[[i]] <- reg_8[[i]]$adj.r.squared
}
pred_return_8 <- as.data.frame(as.matrix(pred_return_8))
pred_return_8 <- pred_return_8[-c(1),]
pred_return_8 <- as.data.frame(as.list(pred_return_8), col.names = names(df_combined_30[2:31]), row.names = "Predicted_Returns")
realized_return_8 <- as.data.frame(as.matrix(realized_return_8))
realized_return_8 <- realized_return_8[-c(1),]
realized_return_8 <- as.data.frame(as.list(realized_return_8), col.names = names(df_combined_30[2:31]), row.names = "Realized_Returns")
realized_vs_pred_return_8 <- rbind(pred_return_8, realized_return_8)
realized_vs_pred_return_8 <- as.data.frame(t(realized_vs_pred_return_8))
realized_vs_pred_return_8
##       Predicted_Returns Realized_Returns
## Food          0.3222931        0.7000611
## Beer          0.3719956        0.9355323
## Smoke         0.2813273        0.8705585
## Games         0.5448740        0.8883944
## Books         0.4313819        0.6579930
## Hshld         0.3887106        0.6785079
## Clths         0.3220425        0.7021030
## Hlth          0.3762053        0.8270593
## Chems         0.4548752        0.7954014
## Txtls         0.4186150        0.7131326
## Cnstr         0.4752720        0.7184293
## Steel         0.5339699        0.6712042
## FabPr         0.4976994        0.8250698
## ElcEq         0.5505322        0.9230279
## Autos         0.5232395        0.9452182
## Carry         0.4694880        0.8582286
## Mines         0.3561400        0.6663700
## Coal          0.4681152        0.7653403
## Oil           0.3764917        0.7382112
## Util          0.3222166        0.6166579
## Telcm         0.2976187        0.5851134
## Servs         0.3541885        0.9882810
## BusEq         0.4765955        0.9277138
## Paper         0.4063406        0.7433857
## Trans         0.4403819        0.6750349
## Whlsl         0.4166664        0.5800000
## Rtail         0.4157324        0.8014834
## Meals         0.3815996        0.8206370
## Fin           0.4788226        0.7587435
## Other         0.4261006        0.5303229
ggplot(data = realized_vs_pred_return_8, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_8))) +
  geom_point(size = 2) +
  geom_label_repel(fontface = 'bold', min.segment.length = 0, segment.color = 'black') +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.6)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.1)) +
  labs(y = "Realized Returns", x = "Predicted Returns") 

r_squared_8 <- as.data.frame(as.matrix(r_squared_8))
r_squared_8 <- r_squared_8[-c(1),]
r_squared_8 <- as.data.frame(as.list(r_squared_8), col.names = names(df_combined_30[2:31]), row.names = "R_Squared")
r_squared_8 <- as.data.frame(t(r_squared_8))
r_squared_8 <- cbind(Industry_Portfolios = rownames(r_squared_8), r_squared_8)
rownames(r_squared_8) <- 1:nrow(r_squared_8)
r_squared_8
##    Industry_Portfolios R_Squared
## 1                 Food 0.6996798
## 2                 Beer 0.4942521
## 3                Smoke 0.3492252
## 4                Games 0.7080587
## 5                Books 0.7138365
## 6                Hshld 0.6776486
## 7                Clths 0.5569650
## 8                 Hlth 0.6645048
## 9                Chems 0.7808480
## 10               Txtls 0.6945682
## 11               Cnstr 0.8403758
## 12               Steel 0.7568121
## 13               FabPr 0.8529388
## 14               ElcEq 0.8142165
## 15               Autos 0.6866316
## 16               Carry 0.7200590
## 17               Mines 0.4574915
## 18                Coal 0.4573826
## 19                 Oil 0.5860283
## 20                Util 0.5893746
## 21               Telcm 0.6038135
## 22               Servs 0.3399916
## 23               BusEq 0.7803378
## 24               Paper 0.7482788
## 25               Trans 0.7942989
## 26               Whlsl 0.6885572
## 27               Rtail 0.7491477
## 28               Meals 0.6199605
## 29                 Fin 0.8643518
## 30               Other 0.7145639
print(paste0("The average R Squared over the 30 Industry Portfolios is ", round(mean(r_squared_8$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 30 Industry Portfolios is 0.666807 under the Fama French 3 Factor Model"

49 Industry Portfolios

df_49 <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\49_Industry_Portfolios.csv", header = TRUE, skip = 11)
df_49 <- df_49[1:1146,]

names(df_49)[names(df_49) == "X"] <- "Date"
df_49 <- as.data.frame(lapply(df_49, as.numeric))
df_49$Date <- parse_date_time(df_49$Date, "ym")
df_FF_49 <- df_FF[1:1146,]

df_combined_49 <- left_join(df_49, df_FF_49, by = "Date")
df_combined_49 <- df_combined_49 %>%
  mutate(MKT_RF_DIFF = Mkt_RF - RF)

head(df_combined_49)
##         Date Agric  Food   Soda  Beer Smoke  Toys   Fun Books Hshld Clths
## 1 1926-07-01  2.37  0.12 -99.99 -5.19  1.29  8.65  2.50 50.21 -0.48  8.08
## 2 1926-08-01  2.23  2.68 -99.99 27.03  6.50 16.81 -0.76 42.98 -3.58 -2.51
## 3 1926-09-01 -0.57  1.58 -99.99  4.02  1.26  8.33  6.42 -4.91  0.73 -0.51
## 4 1926-10-01 -0.46 -3.68 -99.99 -3.31  1.06 -1.40 -5.09  5.37 -4.68  0.12
## 5 1926-11-01  6.75  6.26 -99.99  7.29  4.55  0.00  1.82 -6.40 -0.54  1.87
## 6 1926-12-01 -3.27  0.18 -99.99 -4.09  2.55  2.48  2.14 -3.29  2.56  0.60
##     Hlth MedEq Drugs Chems  Rubbr Txtls BldMt  Cnstr Steel  FabPr Mach ElcEq
## 1 -99.99  4.95  0.91  8.14 -99.99  0.39  2.46  -8.21  4.07 -99.99 5.43  3.18
## 2 -99.99  4.20  4.26  5.50 -99.99  7.97  4.63   7.37  2.17 -99.99 2.06  2.10
## 3 -99.99  4.02 -0.26  5.33 -99.99  2.30 -0.11 -11.76  0.15 -99.99 0.36 -0.56
## 4 -99.99  3.47 -1.76 -4.76 -99.99  1.00 -4.42  -8.59 -3.85 -99.99 1.11 -5.73
## 5 -99.99  4.33  5.76  5.20 -99.99  3.10  2.20   2.25  3.86 -99.99 3.18  5.08
## 6 -99.99 -3.28  1.15  5.37 -99.99  6.39  3.60   0.76  3.75 -99.99 5.02 -2.47
##   Autos   Aero Ships   Guns   Gold Mines  Coal   Oil  Util Telcm  PerSv BusSv
## 1 16.39  -0.65  1.05 -99.99 -99.99  5.64  1.54 -1.40  7.04  0.83 -99.99  5.85
## 2  4.23  -6.58  1.80 -99.99 -99.99  0.55  0.85  3.69 -1.69  2.17 -99.99  1.51
## 3  4.83  -7.09  2.88 -99.99 -99.99  1.74  0.30 -3.69  2.04  2.41 -99.99  0.81
## 4 -7.93 -13.36 -5.45 -99.99 -99.99 -3.20  2.23 -1.04 -2.63 -0.11 -99.99  1.81
## 5 -0.66   6.61  7.89 -99.99 -99.99  8.46 -0.48  0.06  3.71  1.63 -99.99  0.78
## 6 10.49   9.17  2.39 -99.99 -99.99  1.83  1.43  2.94 -0.17  1.99 -99.99  5.16
##   Hardw  Softw Chips LabEq  Paper Boxes Trans  Whlsl Rtail Meals  Banks Insur
## 1  3.76 -99.99  1.31  1.35 -99.99  7.70  1.92 -23.79  0.07  1.87   4.61 -0.54
## 2  3.57 -99.99 -4.70  6.89 -99.99 -2.38  4.85   5.39 -0.75 -0.13  11.83  2.57
## 3  4.23 -99.99 -6.04 -0.63 -99.99 -5.54  0.08  -7.87  0.25 -0.56  -1.75  0.72
## 4 -2.47 -99.99 -3.92  0.21 -99.99 -5.08 -2.62 -15.38 -2.20 -4.11 -11.82 -4.28
## 5  3.28 -99.99  1.31  4.25 -99.99  3.84  1.61   4.67  6.52  4.33  -2.97  3.58
## 6  4.38 -99.99 -0.51 10.14 -99.99 -4.63  3.67   9.65  0.57  1.51  -1.06  3.27
##   RlEst   Fin Other Mkt_RF   SMB   HML   RF MKT_RF_DIFF
## 1  2.89 -5.77  5.20   2.96 -2.38 -2.73 0.22        2.74
## 2  5.30  0.32  6.76   2.64 -1.47  4.14 0.25        2.39
## 3 -3.06 -4.81 -3.86   0.36 -1.39  0.12 0.23        0.13
## 4 -5.74 -0.94 -8.49  -3.24 -0.13  0.65 0.32       -3.56
## 5  2.21  5.13  4.00   2.53 -0.16 -0.38 0.31        2.22
## 6  7.95 -1.59 -2.34   2.62 -0.02  0.00 0.28        2.34
r_squared_9 <- c()
realized_return_9 <- c()
pred_return_9 <- c()
reg_9 <- c()

for (i in 2:50) {
  reg_9[[i]] <- summary(lm(as.matrix(df_combined_49[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_49))
  pred_return_9[[i]] <- mean(reg_9[[i]]$coefficients[2,1] * df_combined_49$MKT_RF_DIFF)
  realized_return_9[[i]] <- mean(df_combined_49[ , i] - df_combined_49$RF)
  r_squared_9[[i]] <- reg_9[[i]]$adj.r.squared
}
pred_return_9 <- as.data.frame(as.matrix(pred_return_9))
pred_return_9 <- pred_return_9[-c(1),]
pred_return_9 <- as.data.frame(as.list(pred_return_9), col.names = names(df_combined_49[2:50]), row.names = "Predicted_Returns")
realized_return_9 <- as.data.frame(as.matrix(realized_return_9))
realized_return_9 <- realized_return_9[-c(1),]
realized_return_9 <- as.data.frame(as.list(realized_return_9), col.names = names(df_combined_49[2:50]), row.names = "Realized_Returns")
realized_vs_pred_return_9 <- rbind(pred_return_9, realized_return_9)
realized_vs_pred_return_9 <- as.data.frame(t(realized_vs_pred_return_9))
realized_vs_pred_return_9
##       Predicted_Returns Realized_Returns
## Agric         0.3690020        0.6897033
## Food          0.3147743        0.7044241
## Soda          0.2142908      -38.2995113
## Beer          0.3719956        0.9355323
## Smoke         0.2813273        0.8705585
## Toys          0.4407853        0.7620593
## Fun           0.5613260        1.0102792
## Books         0.4340629        0.7388133
## Hshld         0.3887106        0.6785079
## Clths         0.3220425        0.7021030
## Hlth          0.2863528      -44.7092234
## MedEq         0.3565161        0.8911693
## Drugs         0.3775667        0.8441187
## Chems         0.4548752        0.7954014
## Rubbr         0.3662613       -4.3570157
## Txtls         0.4186150        0.7131326
## BldMt         0.4729507        0.7568412
## Cnstr         0.5036145        0.8016492
## Steel         0.5339699        0.6712042
## FabPr         0.2392854      -38.5043717
## Mach          0.4988441        0.8294328
## ElcEq         0.5505322        0.9230279
## Autos         0.5232395        0.9452182
## Aero          0.5155849        1.1250611
## Ships         0.4516008        0.7391449
## Guns          0.2001709      -38.2834468
## Gold          0.1467824      -38.3938394
## Mines         0.3880681        0.7765794
## Coal          0.4681152        0.7653403
## Oil           0.3764917        0.7382112
## Util          0.3222166        0.6166579
## Telcm         0.2976187        0.5851134
## PerSv         0.3798745       -0.3957853
## BusSv         0.3622930        0.7523473
## Hardw         0.4887152        0.9697382
## Softw         0.3338181      -40.4757068
## Chips         0.5489626        0.9710995
## LabEq         0.4339411        0.9163525
## Paper         0.4328280       -2.0972513
## Boxes         0.4097480        0.8054887
## Trans         0.4406261        0.6747033
## Whlsl         0.4166664        0.5800000
## Rtail         0.4157324        0.8014834
## Meals         0.3815996        0.8206370
## Banks         0.4395205        0.8982199
## Insur         0.4726298        0.8107417
## RlEst         0.4639355        0.5896859
## Fin           0.5266363        0.8410733
## Other         0.4278090        0.4629058
ggplot(data = realized_vs_pred_return_9, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_9))) +
  geom_point(size = 2) +
  geom_label_repel(fontface = 'bold', min.segment.length = 0, segment.color = 'black') +
  ggtitle("Predicted Vs. Realized Excess Returns") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11)
    ) +
  geom_abline() +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.6)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.1)) +
  labs(y = "Realized Returns", x = "Predicted Returns") 

r_squared_9 <- as.data.frame(as.matrix(r_squared_9))
r_squared_9 <- r_squared_9[-c(1),]
r_squared_9 <- as.data.frame(as.list(r_squared_9), col.names = names(df_combined_49[2:50]), row.names = "R_Squared")
r_squared_9 <- as.data.frame(t(r_squared_9))
r_squared_9 <- cbind(Industry_Portfolios = rownames(r_squared_9), r_squared_9)
rownames(r_squared_9) <- 1:nrow(r_squared_9)
r_squared_9
##    Industry_Portfolios  R_Squared
## 1                Agric 0.42715718
## 2                 Food 0.66291402
## 3                 Soda 0.23948000
## 4                 Beer 0.49425207
## 5                Smoke 0.34922524
## 6                 Toys 0.49113612
## 7                  Fun 0.68209913
## 8                Books 0.60440792
## 9                Hshld 0.67764860
## 10               Clths 0.55696498
## 11                Hlth 0.19332690
## 12               MedEq 0.53771652
## 13               Drugs 0.62411791
## 14               Chems 0.78084801
## 15               Rubbr 0.07380009
## 16               Txtls 0.69456822
## 17               BldMt 0.82365613
## 18               Cnstr 0.64448573
## 19               Steel 0.75681215
## 20               FabPr 0.23899151
## 21                Mach 0.85092894
## 22               ElcEq 0.81421648
## 23               Autos 0.68663158
## 24                Aero 0.58762489
## 25               Ships 0.63643053
## 26                Guns 0.23947891
## 27                Gold 0.23250182
## 28               Mines 0.52800608
## 29                Coal 0.45738260
## 30                 Oil 0.58602833
## 31                Util 0.58937463
## 32               Telcm 0.60381347
## 33               PerSv 0.20305815
## 34               BusSv 0.50744614
## 35               Hardw 0.68531102
## 36               Softw 0.23877173
## 37               Chips 0.72156230
## 38               LabEq 0.68390508
## 39               Paper 0.17398330
## 40               Boxes 0.68816557
## 41               Trans 0.79349957
## 42               Whlsl 0.68855719
## 43               Rtail 0.74914772
## 44               Meals 0.61996047
## 45               Banks 0.64003796
## 46               Insur 0.65899283
## 47               RlEst 0.58425468
## 48                 Fin 0.83430303
## 49               Other 0.61008148
print(paste0("The average R Squared over the 49 Industry Portfolios is ", round(mean(r_squared_9$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 49 Industry Portfolios is 0.560144 under the Fama French 3 Factor Model"
r_df <- c(mean(r_squared_3$R_Squared), mean(r_squared_4$R_Squared), mean(r_squared_5$R_Squared), mean(r_squared_6$R_Squared), mean(r_squared_7$R_Squared), mean(r_squared_8$R_Squared), mean(r_squared_9$R_Squared))
ports <- c("Operating Profitability", "Investment", "Dividend Yield", "Momentum", "17 Industry", "30 Industry", "49 Industry")

r_df <- as.data.frame(r_df)
ports <- as.data.frame(ports)

r_df2 <- cbind(ports, r_df)
names(r_df2)[names(r_df2) == "ports"] <- "Portfolios"
names(r_df2)[names(r_df2) == "r_df"] <- "Average R Squared"
r_df2
##                Portfolios Average R Squared
## 1 Operating Profitability         0.8961896
## 2              Investment         0.8929981
## 3          Dividend Yield         0.8649321
## 4                Momentum         0.8525294
## 5             17 Industry         0.7405800
## 6             30 Industry         0.6668066
## 7             49 Industry         0.5601442

From the table above, we can see that the Fama French 3 Factor Model is able to price the operating profitability, investment, dividend yield, and momentum portfolios quite well. It even prices the 17 Industry Portfolios rather well, though notably not as well as the first four sets. The model starts to struggle when it’s passed portfolios from the 30 Industry and 49 Industry sets. This is likely partly due to the fact that there are significant outliers present in these two data sets. Additionally, these sets of portfolios incorporate an extremely wide range of industries. Not every industry behaves the same and different industries are subject to different sets of factors that affect their prices.

Part 1 (f)

I would add a monthly momentum factor which returns the value of the closing price on the last day of the month minus the closing price on the first day of the month. The resulting values will be either negative, positive, or zero. A positive value would indicate the portfolio has upwards momentum while a negative value would indicate the portfolio has a downwards momentum. A value of zero would indicate that the portfolio has no positive or negative momentum.

Part 1 (g)

The Fama-French three factor model is a good model, but it is certainly not as good as it gets. Other models that incorporate momentum, dividend payout rate, and other factors have been proven to be more accurate at pricing assets. It should also be noted that the Black-Scholes formula is considered the king of asset pricing formulas.

While the Fama-French three factor model does tend to outperform the CAPM, we should not forget about CAPM entirely. It is a very versatile pricing model which gives us insight as to how the portfolio is related to the market index without taking into account other factors.

Empirically, it has been shown that neither the three factor model nor the five factor model is the best model. In general, implementation of the Black-Scholes formula via Bayesian statistics is the best way forward.

The validity of the CAPM model can be tested empirically, but not with absolute rigor. The reason for this is because the CAPM formula is heavily dependent on the market portfolio. Over time, the market portfolio has changed drastically and so it is difficult to empirically test the CAPM model.

Part 2

Part 2 (a): Read in Data

library(readxl)
returns_daily <- read_excel("C:\\Users\\zerva\\Documents\\AM14\\PS1_Daily.xlsx", sheet = "HPR_daily", skip = 1)
prices_daily <- read_excel("C:\\Users\\zerva\\Documents\\AM14\\PS1_Daily.xlsx", sheet = "Prices_daily", skip = 1)

returns_daily$DATE <- parse_date_time(returns_daily$DATE, "ymd")
prices_daily$DATE <- parse_date_time(prices_daily$DATE, "ymd")
head(returns_daily)
## # A tibble: 6 x 9
##   DATE                    MSFT      XOM       GE      JPM     INTC        C
##   <dttm>                 <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 1990-01-02 00:00:00  0.0201   0        0.0349   0.00418  0.0435   0.0307 
## 2 1990-01-03 00:00:00  0.00563 -0.01    -0.00187  0.0333  -0.0278   0.0128 
## 3 1990-01-04 00:00:00  0.0294  -0.0101  -0.00563  0.00403  0.0214  -0.0126 
## 4 1990-01-05 00:00:00 -0.0245  -0.00510 -0.00943  0.00402 -0.00699  0.00851
## 5 1990-01-08 00:00:00  0.0153   0.0154   0.00571  0        0.0141   0.00844
## 6 1990-01-09 00:00:00 -0.00275 -0.0202  -0.0208  -0.032    0.0278  -0.0126 
## # ... with 2 more variables: VWRETD <dbl>, SPRTRN <dbl>

Part 2 (b): Calculate Log Returns

log_returns <- returns_daily %>%
                  mutate(log_MSFT = log(1 + MSFT)) %>%
                  mutate(log_XOM = log(1 + XOM)) %>%
                  mutate(log_GE = log(1 + GE)) %>%
                  mutate(log_JPM = log(1 + JPM)) %>%
                  mutate(log_INTC = log(1 + INTC)) %>%
                  mutate(log_C = log(1 + C)) %>%
                  mutate(log_VWRETD = log(1 + VWRETD)) %>%
                  mutate(log_SPRTRN = log(1 + SPRTRN))

head(log_returns)
## # A tibble: 6 x 17
##   DATE                    MSFT      XOM       GE      JPM     INTC        C
##   <dttm>                 <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 1990-01-02 00:00:00  0.0201   0        0.0349   0.00418  0.0435   0.0307 
## 2 1990-01-03 00:00:00  0.00563 -0.01    -0.00187  0.0333  -0.0278   0.0128 
## 3 1990-01-04 00:00:00  0.0294  -0.0101  -0.00563  0.00403  0.0214  -0.0126 
## 4 1990-01-05 00:00:00 -0.0245  -0.00510 -0.00943  0.00402 -0.00699  0.00851
## 5 1990-01-08 00:00:00  0.0153   0.0154   0.00571  0        0.0141   0.00844
## 6 1990-01-09 00:00:00 -0.00275 -0.0202  -0.0208  -0.032    0.0278  -0.0126 
## # ... with 10 more variables: VWRETD <dbl>, SPRTRN <dbl>, log_MSFT <dbl>,
## #   log_XOM <dbl>, log_GE <dbl>, log_JPM <dbl>, log_INTC <dbl>, log_C <dbl>,
## #   log_VWRETD <dbl>, log_SPRTRN <dbl>
log_returns <- subset(log_returns, select = -c(MSFT, XOM, GE, JPM, INTC, C, VWRETD, SPRTRN))
head(log_returns)
## # A tibble: 6 x 9
##   DATE                log_MSFT  log_XOM   log_GE  log_JPM log_INTC    log_C
##   <dttm>                 <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 1990-01-02 00:00:00  0.0199   0        0.0343   0.00418  0.0426   0.0302 
## 2 1990-01-03 00:00:00  0.00562 -0.0101  -0.00187  0.0328  -0.0282   0.0127 
## 3 1990-01-04 00:00:00  0.0290  -0.0102  -0.00564  0.00402  0.0212  -0.0127 
## 4 1990-01-05 00:00:00 -0.0248  -0.00512 -0.00948  0.00401 -0.00702  0.00847
## 5 1990-01-08 00:00:00  0.0152   0.0153   0.00570  0        0.0140   0.00840
## 6 1990-01-09 00:00:00 -0.00275 -0.0204  -0.0211  -0.0325   0.0274  -0.0126 
## # ... with 2 more variables: log_VWRETD <dbl>, log_SPRTRN <dbl>

Part 2 (c): ACF Plots for S&P 500 Composite Index Squared Returns and Absolute Returns

acf(log_returns$log_SPRTRN**2)

acf(abs(log_returns$log_SPRTRN))

Part 2 (d): Time-Varying Volatility Plot for Microsoft using Moving Average Model

library(roll)
## Warning: package 'roll' was built under R version 4.1.2
m_avg_10 <- as.data.frame(roll_sd(log_returns$log_MSFT, width = 50))
m_avg_10$Date <- log_returns$DATE
names(m_avg_10)[names(m_avg_10) == "roll_sd(log_returns$log_MSFT, width = 50)"] <- "MSFT_10"

m_avg_20 <- as.data.frame(roll_sd(log_returns$log_MSFT, width = 100))
m_avg_20$Date <- log_returns$DATE
names(m_avg_20)[names(m_avg_20) == "roll_sd(log_returns$log_MSFT, width = 100)"] <- "MSFT_20"

m_avg_combined <- left_join(m_avg_10, m_avg_20, by = "Date")
ggplot(data = m_avg_combined) +
  geom_line(aes(x = Date, y = MSFT_10, color = "10 week window"), size = 1) +
  geom_line(aes(x = Date, y = MSFT_20, color = "20 week window"), size = 1) +
  ggtitle("MSFT 10 Vs 20 Week Window") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 10.5),
    legend.key.height = unit(1, 'cm'),
    legend.key.width = unit(1.5, 'cm'),
    legend.position = "right", legend.margin=margin(l = 30)
    ) +
  labs(y = "MA of Volatility of Log Returns", x = "Date", colour = "Legend") +
  guides(color = guide_legend(override.aes = list(size = 1.5))) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 10), limits = c(-0.01, 0.06))

In light of Modern Portfolio Theory, the results from the above chart indicate that at times, even very secure assets can exhibit noteworthy volatility and so one must choose appropriate windows with which to analyze said volatility.

Part 2 (e): Time-Varying Volatility Plot for S&P 500 Composite Index using Moving Average Model

library(roll)
m_avg_10_2 <- as.data.frame(roll_sd(log_returns$log_SPRTRN, width = 50))
m_avg_10_2$Date <- log_returns$DATE
names(m_avg_10_2)[names(m_avg_10_2) == "roll_sd(log_returns$log_SPRTRN, width = 50)"] <- "SPRTRN_10"

m_avg_20_2 <- as.data.frame(roll_sd(log_returns$log_SPRTRN, width = 100))
m_avg_20_2$Date <- log_returns$DATE
names(m_avg_20_2)[names(m_avg_20_2) == "roll_sd(log_returns$log_SPRTRN, width = 100)"] <- "SPRTRN_20"

m_avg_combined_2 <- left_join(m_avg_10_2, m_avg_20_2, by = "Date")
ggplot(data = m_avg_combined_2) +
  geom_line(aes(x = Date, y = SPRTRN_10, color = "10 week window"), size = 1) +
  geom_line(aes(x = Date, y = SPRTRN_20, color = "20 week window"), size = 1) +
  ggtitle("SPRTRN 10 Vs 20 Week Window") +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 10.5),
    legend.key.height = unit(1, 'cm'),
    legend.key.width = unit(1.5, 'cm'),
    legend.position = "right", legend.margin=margin(l = 30)
    ) +
  labs(y = "MA of Volatility of Log Returns", x = "Date", colour = "Legend") +
  guides(color = guide_legend(override.aes = list(size = 1.5))) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 10), limits = c(-0.01, 0.06))

We can see that the market index exhibits much less volatility than Microsoft, which is to be expected as the market portfolio will not have any firm-specific risk involved.

Part 2 (f): EWMA Model for S&P 500 Composite Index Log Returns

library(qcc)
## Warning: package 'qcc' was built under R version 4.1.2
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
ewma(log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),lambda = 0.5, std.dev = 0.01)

## List of 15
##  $ call      : language ewma(data = log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),      std.dev = 0.01, lambda = 0.5)
##  $ type      : chr "ewma"
##  $ data.name : chr "log_returns$log_SPRTRN"
##  $ data      : num [1:6301, 1] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
##   ..- attr(*, "dimnames")=List of 2
##  $ statistics: Named num [1:6301] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
##   ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
##  $ sizes     : int [1:6301] 1 1 1 1 1 1 1 1 1 1 ...
##  $ center    : num 0.00028
##  $ std.dev   : num 0.01
##  $ x         : int [1:6301] 1 2 3 4 5 6 7 8 9 10 ...
##  $ y         : Named num [1:6301] 0.008961 0.003186 -0.002732 -0.006268 -0.000882 ...
##   ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
##  $ sigma     : num [1:6301] 0.005 0.00559 0.00573 0.00576 0.00577 ...
##  $ lambda    : num 0.5
##  $ nsigmas   : num 3
##  $ limits    : num [1:6301, 1:2] -0.0147 -0.0165 -0.0169 -0.017 -0.017 ...
##   ..- attr(*, "dimnames")=List of 2
##  $ violations: Named int [1:112] 151 164 265 476 1979 2030 2172 2189 2190 2191 ...
##   ..- attr(*, "names")= chr [1:112] "151" "164" "265" "476" ...
##  - attr(*, "class")= chr "ewma.qcc"
ewma(log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),lambda = 0.75, std.dev = 0.01)

## List of 15
##  $ call      : language ewma(data = log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),      std.dev = 0.01, lambda = 0.75)
##  $ type      : chr "ewma"
##  $ data.name : chr "log_returns$log_SPRTRN"
##  $ data      : num [1:6301, 1] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
##   ..- attr(*, "dimnames")=List of 2
##  $ statistics: Named num [1:6301] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
##   ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
##  $ sizes     : int [1:6301] 1 1 1 1 1 1 1 1 1 1 ...
##  $ center    : num 0.00028
##  $ std.dev   : num 0.01
##  $ x         : int [1:6301] 1 2 3 4 5 6 7 8 9 10 ...
##  $ y         : Named num [1:6301] 0.0133 0.00138 -0.00614 -0.00889 0.00116 ...
##   ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
##  $ sigma     : num [1:6301] 0.0075 0.00773 0.00775 0.00775 0.00775 ...
##  $ lambda    : num 0.75
##  $ nsigmas   : num 3
##  $ limits    : num [1:6301, 1:2] -0.0222 -0.0229 -0.023 -0.023 -0.023 ...
##   ..- attr(*, "dimnames")=List of 2
##  $ violations: Named int [1:131] 151 164 166 190 265 476 1565 1979 2030 2172 ...
##   ..- attr(*, "names")= chr [1:131] "151" "164" "166" "190" ...
##  - attr(*, "class")= chr "ewma.qcc"
ewma(log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),lambda = 0.94, std.dev = 0.01)

## List of 15
##  $ call      : language ewma(data = log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),      std.dev = 0.01, lambda = 0.94)
##  $ type      : chr "ewma"
##  $ data.name : chr "log_returns$log_SPRTRN"
##  $ data      : num [1:6301, 1] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
##   ..- attr(*, "dimnames")=List of 2
##  $ statistics: Named num [1:6301] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
##   ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
##  $ sizes     : int [1:6301] 1 1 1 1 1 1 1 1 1 1 ...
##  $ center    : num 0.00028
##  $ std.dev   : num 0.01
##  $ x         : int [1:6301] 1 2 3 4 5 6 7 8 9 10 ...
##  $ y         : Named num [1:6301] 0.0166 -0.00144 -0.00822 -0.00971 0.00365 ...
##   ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
##  $ sigma     : num [1:6301] 0.0094 0.00942 0.00942 0.00942 0.00942 ...
##  $ lambda    : num 0.94
##  $ nsigmas   : num 3
##  $ limits    : num [1:6301, 1:2] -0.0279 -0.028 -0.028 -0.028 -0.028 ...
##   ..- attr(*, "dimnames")=List of 2
##  $ violations: Named int [1:148] 151 164 166 265 476 1565 1940 1979 1980 2030 ...
##   ..- attr(*, "names")= chr [1:148] "151" "164" "166" "265" ...
##  - attr(*, "class")= chr "ewma.qcc"

Considering the plot above, a sensible choice for the variance would be 0.0001. As the variance is increased, the confidence intervals become wider. The same thing happens when lambda is increased. As lambda is increased, the more recent observations receive heavier and heavier weights as compared to earlier observations and so the confidence intervals become wider.